For this guiding question, I look only at 2024 data. Below, you’ll see on the each pilot practice (what schools hope to implement in the next 1-5 years) on the left and catalyst for innovation, or the reason(s) school leaders cite for innovating, across the top.
# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>%
select(starts_with("catalyst_")) %>%
select(-starts_with("catalyst_key"), -contains("_other"))
pilots <- full %>%
select(starts_with("pilot_")) %>%
select(-contains("_other")) remove_zero_variance <- function(df) {
# Apply function to each column
non_constant_cols <- df[, apply(df, 2, sd) != 0]
return(non_constant_cols)
}
catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed
reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.3, c1.cex = 0.3, number.cex = 0.2, diag = TRUE)Kind of clunky. I’m going to modify to fix the labels manually and then expand the figure to better see the correlations.
# rename catalysts
rename_catalyst_mapping <- setNames(
dictionary$clean_labels[dictionary$variable_name %in% colnames(catalysts)],
dictionary$variable_name[dictionary$variable_name %in% colnames(catalysts)]
)
# rename pilots
rename_pilot_mapping <- setNames(
dictionary$clean_labels[dictionary$variable_name %in% colnames(pilots)],
dictionary$variable_name[dictionary$variable_name %in% colnames(pilots)]
)
# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>%
select(starts_with("catalyst_")) %>%
select(-starts_with("catalyst_key"), -contains("_other")) %>%
rename_with(~ rename_catalyst_mapping[.x], .cols = everything())
pilots <- full %>%
select(starts_with("pilot_")) %>%
select(-contains("_other")) %>%
rename_with(~ rename_pilot_mapping[.x], .cols = everything())
catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed
reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.6, c1.cex = 0.3, number.cex = 0.2, diag = TRUE, cl.pos = "n")The following are related to each catalyst. Correlations above 0.15 are noted.
catalyst_all_years <- import(here("data/longitudinal", "longitudinal_data.csv")) %>%
select(year, school_id, starts_with("catalyst")) %>%
filter(year == 2021 | year == 2024)
catalyst_all_years_long <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_")From Cycle 2, I had started to create this graph. This shows catalyst selection across schools.
library(ggrepel)
catalyst_all_years_long <- catalyst_all_years_long %>%
group_by(catalyst, year) %>%
summarize(total_selected = sum(selected))
label_positions <- catalyst_all_years_long %>%
group_by(catalyst) %>%
summarize(year = 2021, selected = first(total_selected))
catalyst_all_years_long %>%
ggplot(aes(x = year, y = total_selected, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
labs(x = "Year",
y = "Number of Times Selected",
title = "Catalyst Selection by Year \n Across Schools") +
theme(legend.position = "none")Version using percentages is added here.
n_2021 = 232
n_2024 = 189
catalyst_all_years_long <- catalyst_all_years_long %>%
mutate(pct = case_when(year == 2021 ~ total_selected/n_2021,
year == 2024 ~ total_selected/n_2024))
label_positions <- catalyst_all_years_long %>%
group_by(catalyst) %>%
summarize(year = 2021, pct = first(pct))
catalyst_all_years_long %>%
ggplot(aes(x = year, y = pct, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
scale_y_continuous(labels = scales::percent_format()) +
labs(x = "Year",
y = "Percent of Times Selected",
title = "Catalyst Selection by Year \n Across Schools") +
theme(legend.position = "none")What about schools who responded to the survey both years? So, looking within schools? Let’s narrow the sample and check that out.
catalyst_all_years_within <- catalyst_all_years %>%
filter(duplicated(school_id) | duplicated(school_id, fromLast = TRUE)) %>%
select(-contains("_other"), -contains("_key")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
group_by(catalyst, year) %>%
summarize(total_selected = sum(selected))
label_positions <- catalyst_all_years_within %>%
group_by(catalyst) %>%
summarize(year = 2021, selected = first(total_selected))
catalyst_all_years_within %>%
ggplot(aes(x = year, y = total_selected, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
labs(x = "Year",
y = "Number of Times Selected",
title = "Catalyst Selection by Year \n Within Schools") +
theme(legend.position = "none")Now, in percentage. The total value here will be different than in
the across graph since not ever school answered every year.
Only 82 did.
n_within = 82
catalyst_all_years_within <- catalyst_all_years_within %>%
mutate(pct = total_selected/n_within)
label_positions <- catalyst_all_years_within %>%
group_by(catalyst) %>%
summarize(year = 2021, pct = first(pct))
catalyst_all_years_within %>%
ggplot(aes(x = year, y = pct, color = catalyst)) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
scale_x_continuous(breaks = c(2021, 2024)) +
scale_y_continuous(labels = scales::percent_format()) +
labs(x = "Year",
y = "Percent of Times Selected",
title = "Catalyst Selection by Year \n Within Schools") +
theme(legend.position = "none")Create a version similar to the most added practices figures.
catalyst_all_years_within_change <- catalyst_all_years_within %>%
pivot_wider(names_from = year,
values_from = c(total_selected, pct))
catalyst_all_years_within_change %>%
ggplot(aes(x = total_selected_2021, xend = total_selected_2024, y = reorder(catalyst, total_selected_2024), yend = catalyst)) +
geom_segment(color = "black", linetype = "dotted") +
geom_point(aes(x = total_selected_2021), color = "red") +
geom_point(aes(x = total_selected_2024), color = "blue") +
guides(col = guide_legend(nrow = 1, title = NULL)) +
bar_x_scale_count +
geom_text(
aes(x = (total_selected_2021 + total_selected_2024)/2, label = paste("Δ =", total_selected_2024 - total_selected_2021), color = factor(sign(total_selected_2024 - total_selected_2021))),
nudge_y = .3,
hjust = 0,
show.legend = FALSE
) +
scale_color_manual(
values = c("red", "blue"),
labels = c("2021", "2024")
) +
labs(
y = "Catalysts",
x = "Times Selected",
title = "Catalyst Selection from 2021 to 2024 Within Schools"
) +
theme(
panel.grid.major.y = element_blank(),
legend.position = "bottom",
axis.text.y = element_text(size = rel(0.6))
)How many schools selected just one catalyst in particular?
one_cat <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
mutate(cat_select = rowSums(across(3:11))) %>%
filter(cat_select == 1) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
filter(selected == 1) %>%
group_by(year, catalyst) %>%
summarise(count = n())one_cat %>%
ggplot(aes(x = count, y = catalyst, fill = as.factor(year))) +
geom_col(position = "dodge") +
scale_fill_manual(values = transcend_cols) +
labs(title = "Solo-Select Catalysts",
x = "Catalyst",
y = "Count",
legend = "Year")Also going to represent this information here in a change plot. Note, this is not going to be within schools since there is only once school that selected one catalyst each year. Also note, covid and student_agency were exclusive to 2021.
one_cat %>%
pivot_wider(names_from = "year",
values_from = "count") %>%
ggplot(aes(x = `2021`, xend = `2024`, y = reorder(catalyst, `2024`), yend = catalyst)) +
geom_segment(color = "black", linetype = "dotted") +
geom_point(aes(x = `2021`), color = "red") +
geom_point(aes(x = `2024`), color = "blue") +
geom_point(x = 1, y = "internal", color = "purple") +
geom_point(x = 1, y = "external", color = "purple") +
guides(col = guide_legend(nrow = 1, title = NULL)) +
# bar_x_scale_count +
geom_text(
aes(x = (`2021` + `2024`)/2 -1, label = paste("Δ =", `2024` - `2021`), color = factor(sign(`2024` - `2021`))),
nudge_y = .3,
hjust = 0,
show.legend = FALSE
) +
labs(
y = "Catalysts",
x = "Times Selected",
title = "Solo-Select Catalyst Selection \nfrom 2021 to 2024 Across Schools"
) +
theme(
panel.grid.major.y = element_blank(),
legend.position = "bottom",
axis.text.y = element_text(size = rel(0.6))
)cat_by_year <- catalyst_all_years %>%
select(year, school_id, starts_with("catalyst_key"), -contains("_other")) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst_key",
values_to = "selected",
names_prefix = "catalyst_key_") %>%
pivot_wider(names_from = year,
values_from = selected) %>%
na.omit() %>% #omit schools that only answered one year
mutate(selected = case_when(`2021` == 0 & `2024` == 0 ~ "neither year",
`2021` == 0 & `2024` == 1 ~ "added",
`2021` == 1 & `2024` == 0 ~ "dropped",
`2021` == 1 & `2024` == 1 ~ "both years")) %>%
group_by(catalyst_key, selected) %>%
summarise(n = n())cat_by_year %>%
ggplot(aes(x = n, y = catalyst_key, fill = selected)) +
geom_col(position = "dodge") +
scale_fill_manual(values = transcend_cols) +
labs(title = "Catalyst Key Selection for Schools with Both Years of Data",
x = "Catalyst",
y = "Count",
legend = "Status")This is from Cycle 2. What if we looked at schools who only selected one catalyst?
one_cat_change <- catalyst_all_years %>%
select(-contains("_other"), -contains("_key")) %>%
mutate(cat_select = rowSums(across(3:11))) %>%
filter(cat_select == 1) %>%
pivot_longer(cols = contains("catalyst"),
names_to = "catalyst",
values_to = "selected",
names_prefix = "catalyst_") %>%
filter(selected == 1)Looks like there was just one school who meets this criteria. This school went from focusing on inequity to demographics.
2024 only
First, am curious how the free response category responded. I generated a wordcloud of these for barriers below.
library(wordcloud)
library(tm)
responses <- barriers$barrier_other_text
# Create a text corpus
corpus <- Corpus(VectorSource(responses))
# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower)) # Convert to lower case
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english")) # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace) # Strip whitespace
# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)
# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)
# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))While we’re here, I’m also including a wordcloud for the catalysts for innovation here.
responses <- full$catalyst_other_text
# Create a text corpus
corpus <- Corpus(VectorSource(responses))
# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower)) # Convert to lower case
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english")) # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace) # Strip whitespace
# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)
# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)
# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))Back to barriers –
For the rest of the options, here is what leaders selected.
barriers <- barriers %>%
select(-contains("_other")) %>%
pivot_longer(cols = contains("barrier"),
names_to = "barrier",
values_to = "selected",
names_prefix = "barrier_") %>%
filter(selected == 1) %>%
group_by(barrier) %>%
summarize(n = n())barriers %>%
ggplot(aes(reorder(barrier, n), n)) +
geom_col(aes(fill = barrier)) +
scale_fill_manual(values = c(transcend_cols, transcend_cols2)) +
labs(title = "2024 Barriers to Sustainability",
x = "Barrier",
y = "Count") +
theme(legend.position = "none") +
coord_flip()